
<!DOCTYPE html>

<html xmlns="http://www.w3.org/1999/xhtml">
  <head>
    <meta charset="utf-8" />
    <title>UCTB.dataset.dataset &#8212; UCTB  documentation</title>
    <link rel="stylesheet" href="../../../_static/nature.css" type="text/css" />
    <link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
    <script type="text/javascript" id="documentation_options" data-url_root="../../../" src="../../../_static/documentation_options.js"></script>
    <script type="text/javascript" src="../../../_static/jquery.js"></script>
    <script type="text/javascript" src="../../../_static/underscore.js"></script>
    <script type="text/javascript" src="../../../_static/doctools.js"></script>
    <script type="text/javascript" src="../../../_static/language_data.js"></script>
    <link rel="index" title="Index" href="../../../genindex.html" />
    <link rel="search" title="Search" href="../../../search.html" /> 
  </head><body>
    <div class="related" role="navigation" aria-label="related navigation">
      <h3>Navigation</h3>
      <ul>
        <li class="right" style="margin-right: 10px">
          <a href="../../../genindex.html" title="General Index"
             accesskey="I">index</a></li>
        <li class="right" >
          <a href="../../../py-modindex.html" title="Python Module Index"
             >modules</a> |</li>
        <li class="nav-item nav-item-0"><a href="../../../index.html">UCTB  documentation</a> &#187;</li>
          <li class="nav-item nav-item-1"><a href="../../index.html" accesskey="U">Module code</a> &#187;</li> 
      </ul>
    </div>  

    <div class="document">
      <div class="documentwrapper">
        <div class="bodywrapper">
          <div class="body" role="main">
            
  <h1>Source code for UCTB.dataset.dataset</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">os</span>
<span class="kn">import</span> <span class="nn">wget</span>
<span class="kn">import</span> <span class="nn">pickle</span>
<span class="kn">import</span> <span class="nn">tarfile</span>


<div class="viewcode-block" id="DataSet"><a class="viewcode-back" href="../../../UCTB.dataset.html#UCTB.dataset.dataset.DataSet">[docs]</a><span class="k">class</span> <span class="nc">DataSet</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
    <span class="sd">&quot;&quot;&quot;An object storing basic data from a formatted pickle file.</span>

<span class="sd">    See also `Build your own datasets &lt;https://di-chai.github.io/UCTB/static/tutorial.html&gt;`_.</span>

<span class="sd">    Args:</span>
<span class="sd">        dataset (str): A string containing path of the dataset pickle file or a string of name of the dataset.</span>
<span class="sd">        city (str or ``None``): ``None`` if dataset is file path, or a string of name of the city. Default: ``None``</span>
<span class="sd">        data_dir (str or ``None``): The dataset directory. If set to ``None``, a directory will be created.</span>
<span class="sd">            If ``dataset`` is file path, ``data_dir`` should be ``None`` too. Default: ``None``</span>

<span class="sd">    Attributes:</span>
<span class="sd">        data (dict): The data directly from the pickle file. ``data`` may have a ``data[&#39;contribute_data&#39;]`` dict to</span>
<span class="sd">            store supplementary data.</span>
<span class="sd">        time_range (list): From ``data[&#39;TimeRange&#39;]`` in the format of [YYYY-MM-DD, YYYY-MM-DD] indicating the time</span>
<span class="sd">            range of the data.</span>
<span class="sd">        time_fitness (int): From ``data[&#39;TimeFitness&#39;]`` indicating how many minutes is a single time slot.</span>
<span class="sd">        node_traffic (np.ndarray): Data recording the main stream data of the nodes in during the time range.</span>
<span class="sd">            From ``data[&#39;Node&#39;][&#39;TrafficNode&#39;]`` with shape as [time_slot_num, node_num].</span>
<span class="sd">        node_monthly_interaction (np.ndarray): Data recording the monthly interaction of pairs of nodes.</span>
<span class="sd">            Its shape is [month_num, node_num, node_num].It&#39;s from ``data[&#39;Node&#39;][&#39;TrafficMonthlyInteraction&#39;]``</span>
<span class="sd">            and is used to build interaction graph.</span>
<span class="sd">            Its an optional attribute and can be set as an empty list if interaction graph is not needed.</span>
<span class="sd">        node_station_info (dict): A dict storing the coordinates of nodes. It shall be formatted as {id (may be</span>
<span class="sd">            arbitrary): [id (when sorted, should be consistant with index of ``node_traffic``), latitude, longitude,</span>
<span class="sd">            other notes]}. It&#39;s from ``data[&#39;Node&#39;][&#39;StationInfo&#39;]`` and is used to build distance graph.</span>
<span class="sd">            Its an optional attribute and can be set as an empty list if distance graph is not needed.</span>
<span class="sd">    &quot;&quot;&quot;</span>
    <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataset</span><span class="p">,</span> <span class="n">city</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">data_dir</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>

        <span class="bp">self</span><span class="o">.</span><span class="n">dataset</span> <span class="o">=</span> <span class="n">dataset</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">city</span> <span class="o">=</span> <span class="n">city</span>

        <span class="k">if</span> <span class="n">data_dir</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
            <span class="n">data_dir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">dirname</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">dirname</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="vm">__file__</span><span class="p">))),</span> <span class="s1">&#39;data&#39;</span><span class="p">)</span>

        <span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">isdir</span><span class="p">(</span><span class="n">data_dir</span><span class="p">)</span> <span class="ow">is</span> <span class="kc">False</span><span class="p">:</span>
            <span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">data_dir</span><span class="p">)</span>

        <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">city</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
            <span class="n">pkl_file_name</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">data_dir</span><span class="p">,</span> <span class="s1">&#39;</span><span class="si">{}</span><span class="s1">_</span><span class="si">{}</span><span class="s1">.pkl&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dataset</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">city</span><span class="p">))</span>
        <span class="k">else</span><span class="p">:</span>
            <span class="n">pkl_file_name</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">dataset</span>

        <span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">isfile</span><span class="p">(</span><span class="n">pkl_file_name</span><span class="p">)</span> <span class="ow">is</span> <span class="kc">False</span><span class="p">:</span>
            <span class="k">try</span><span class="p">:</span>
                <span class="n">tar_file_name</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">data_dir</span><span class="p">,</span> <span class="s1">&#39;</span><span class="si">{}</span><span class="s1">_</span><span class="si">{}</span><span class="s1">.tar.gz&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dataset</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">city</span><span class="p">))</span>
                <span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">isfile</span><span class="p">(</span><span class="n">tar_file_name</span><span class="p">)</span> <span class="ow">is</span> <span class="kc">False</span><span class="p">:</span>
                    <span class="nb">print</span><span class="p">(</span><span class="s1">&#39;Downloading data into&#39;</span><span class="p">,</span> <span class="n">data_dir</span><span class="p">)</span>
                    <span class="n">wget</span><span class="o">.</span><span class="n">download</span><span class="p">(</span><span class="s1">&#39;https://github.com/Di-Chai/UCTB_Data/blob/master/</span><span class="si">%s</span><span class="s1">_</span><span class="si">%s</span><span class="s1">.tar.gz?raw=true&#39;</span> <span class="o">%</span>
                                  <span class="p">(</span><span class="n">dataset</span><span class="p">,</span> <span class="n">city</span><span class="p">),</span> <span class="n">tar_file_name</span><span class="p">)</span>
                    <span class="nb">print</span><span class="p">(</span><span class="s1">&#39;Download succeed&#39;</span><span class="p">)</span>
                <span class="k">else</span><span class="p">:</span>
                    <span class="nb">print</span><span class="p">(</span><span class="s1">&#39;Found&#39;</span><span class="p">,</span> <span class="n">tar_file_name</span><span class="p">)</span>
                <span class="n">tar</span> <span class="o">=</span> <span class="n">tarfile</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="n">tar_file_name</span><span class="p">,</span> <span class="s2">&quot;r:gz&quot;</span><span class="p">)</span>
                <span class="n">file_names</span> <span class="o">=</span> <span class="n">tar</span><span class="o">.</span><span class="n">getnames</span><span class="p">()</span>
                <span class="k">for</span> <span class="n">file_name</span> <span class="ow">in</span> <span class="n">file_names</span><span class="p">:</span>
                    <span class="n">tar</span><span class="o">.</span><span class="n">extract</span><span class="p">(</span><span class="n">file_name</span><span class="p">,</span> <span class="n">data_dir</span><span class="p">)</span>
                <span class="n">tar</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
                <span class="n">os</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">tar_file_name</span><span class="p">)</span>
            <span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
                <span class="nb">print</span><span class="p">(</span><span class="n">e</span><span class="p">)</span>
                <span class="k">raise</span> <span class="ne">FileExistsError</span><span class="p">(</span><span class="s1">&#39;Download Failed&#39;</span><span class="p">)</span>

        <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">pkl_file_name</span><span class="p">,</span> <span class="s1">&#39;rb&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
            <span class="bp">self</span><span class="o">.</span><span class="n">data</span> <span class="o">=</span> <span class="n">pickle</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>

        <span class="bp">self</span><span class="o">.</span><span class="n">time_range</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">[</span><span class="s1">&#39;TimeRange&#39;</span><span class="p">]</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">time_fitness</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">[</span><span class="s1">&#39;TimeFitness&#39;</span><span class="p">]</span>

        <span class="bp">self</span><span class="o">.</span><span class="n">node_traffic</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">[</span><span class="s1">&#39;Node&#39;</span><span class="p">][</span><span class="s1">&#39;TrafficNode&#39;</span><span class="p">]</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">node_monthly_interaction</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">[</span><span class="s1">&#39;Node&#39;</span><span class="p">][</span><span class="s1">&#39;TrafficMonthlyInteraction&#39;</span><span class="p">]</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">node_station_info</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">[</span><span class="s1">&#39;Node&#39;</span><span class="p">][</span><span class="s1">&#39;StationInfo&#39;</span><span class="p">]</span>

        <span class="bp">self</span><span class="o">.</span><span class="n">grid_traffic</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">[</span><span class="s1">&#39;Grid&#39;</span><span class="p">][</span><span class="s1">&#39;TrafficGrid&#39;</span><span class="p">]</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">grid_lat_lng</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">[</span><span class="s1">&#39;Grid&#39;</span><span class="p">][</span><span class="s1">&#39;GridLatLng&#39;</span><span class="p">]</span>

        <span class="bp">self</span><span class="o">.</span><span class="n">external_feature_weather</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">[</span><span class="s1">&#39;ExternalFeature&#39;</span><span class="p">][</span><span class="s1">&#39;Weather&#39;</span><span class="p">]</span></div>
</pre></div>

          </div>
        </div>
      </div>
      <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
        <div class="sphinxsidebarwrapper">
<div id="searchbox" style="display: none" role="search">
  <h3 id="searchlabel">Quick search</h3>
    <div class="searchformwrapper">
    <form class="search" action="../../../search.html" method="get">
      <input type="text" name="q" aria-labelledby="searchlabel" />
      <input type="submit" value="Go" />
    </form>
    </div>
</div>
<script type="text/javascript">$('#searchbox').show(0);</script>
        </div>
      </div>
      <div class="clearer"></div>
    </div>
    <div class="related" role="navigation" aria-label="related navigation">
      <h3>Navigation</h3>
      <ul>
        <li class="right" style="margin-right: 10px">
          <a href="../../../genindex.html" title="General Index"
             >index</a></li>
        <li class="right" >
          <a href="../../../py-modindex.html" title="Python Module Index"
             >modules</a> |</li>
        <li class="nav-item nav-item-0"><a href="../../../index.html">UCTB  documentation</a> &#187;</li>
          <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> &#187;</li> 
      </ul>
    </div>
    <div class="footer" role="contentinfo">
        &#169; Copyright 2019, Di Chai, Leye Wang, Jin Xu, Wenjie Yang, Liyue Chen.
      Created using <a href="http://sphinx-doc.org/">Sphinx</a> 2.2.1.
    </div>
  </body>
</html>